Data Exploration
Projects per year
ggplot(dat) + geom_bar(aes(year), stat="count") + labs(title="Number of projects per year")

There are very few projects before 2013, and so for simplicity’s sake and to simplify the model just a tad, we remove 2012 and before.
dat = dat[dat$year >= 2013, ]
Projects per month
dat$month <- factor(dat$month)
dat %>%
ggplot() + labs(title="Counts in Months") +
geom_bar(aes(month)) +
scale_x_discrete(labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))

Interestingly, there are a ton of January projects. This might have to do with scraping the site which lists projects chronologically.
Projects per day of the week
dat$day_of_week <- factor(dat$day_of_week)
dat %>%
ggplot() + labs(title="Counts in Days of the Week") +
geom_bar(aes(day_of_week)) +
scale_x_discrete(labels = c("Mon", "Tue", "Wed", "Thur", "Fri", "Sat", "Sun"))

Projects from the US
ggplot(dat) + geom_bar(aes(x=factor(from_US)), stat="count") + labs(x="From US", title="Number of projects from US")

Most projects are US based. Reduce the model complexity by restricting to US then.
dat <- dat[dat$from_US == 1, ]
Project goals, text lengths, and cancer types
We scale/z-score the continuous variables according to how we wish to interpret the coefficients of the model. From Andrew Gelman: “Standardizing puts things on an approximately common scale …. (Standarize for) comparing coefficients for different predictors within a model”. Binary and categorical variables are left as is.
dat$goal_sc <- scale(dat$goal)
dat %>%
ggplot() + labs(title="Goal Amount Distribution") +
geom_density(aes(goal))

dat$text_length_words_sc <- scale(dat$text_length_words)
dat %>%
ggplot() + labs(title="Duration Distribution") +
geom_density(aes(duration_float))

dat$duration_float_sc = scale(dat$duration_float)
dat %>%
ggplot() + labs(title="Text Length Distribution") +
geom_density(aes(text_length_words))

dat$photos_sc <- scale(dat$photos)
dat %>%
ggplot() + labs(title="Photos Distribution") +
geom_density(aes(photos))

dat$updates_sc <- scale(dat$updates)
dat %>%
ggplot() + labs(title="Updates Distribution") +
geom_density(aes(updates))

dat$shares_sc <- scale(dat$shares)
dat %>%
ggplot() + labs(title="FB Shares Distribution") +
geom_density(aes(shares))

dat$comments_sc <- scale(dat$comments)
dat %>%
ggplot() + labs(title="Comments Distribution") +
geom_density(aes(comments))

dat$friends_sc <- scale(dat$friends)
dat %>%
ggplot() + labs(title="FB Friends Distribution") +
geom_density(aes(friends))

dat %>%
ggplot() + labs(title="Cancer Type Counts") +
geom_bar(aes(x=cancer_type)) +
theme(axis.text.x=element_text(angle = 60, hjust=1))

For now, simply just remove some far outliers. Looking for a more methododical way to do this.
# nrow(dat[dat$goal > 4e5, ])
# dat = dat[dat$goal <= 4e5, ]
#
# nrow(dat[dat$photos > 50, ])
# dat = dat[dat$photos <= 50, ]
nrow(dat[dat$updates > 200, ])
## [1] 1
dat = dat[dat$updates <= 200, ]
nrow(dat[dat$shares > 20000, ])
## [1] 1
dat = dat[dat$shares <= 20000, ]
Status
nrow(dat)
## [1] 3134
In total, we have 3134 IID samples to work with.
dat %>%
ggplot() + labs(title="Goal Amount Distribution") + guides(color=guide_legend(title="Status")) +
geom_density(aes(goal, color=fct_recode(factor(status), "Successful"="1", "Failed"="0"))) +
theme_minimal()

dat %>%
ggplot() + labs(title="Duration Distribution") + guides(color=guide_legend(title="Status")) +
geom_density(aes(duration_float, color=fct_recode(factor(status), "Successful"="1", "Failed"="0"))) +
theme_minimal()

dat %>%
ggplot() + labs(title="Text Length Distribution") + guides(color=guide_legend(title="Status")) +
geom_density(aes(text_length_words, color=fct_recode(factor(status), "Successful"="1", "Failed"="0"))) +
theme_minimal()

dat %>%
ggplot() + labs(title="Photos Distribution") + guides(color=guide_legend(title="Status")) +
geom_density(aes(photos, color=fct_recode(factor(status), "Successful"="1", "Failed"="0"))) +
theme_minimal()

dat %>%
ggplot() + labs(title="Updates Distribution") + guides(color=guide_legend(title="Status")) +
geom_density(aes(updates, color=fct_recode(factor(status), "Successful"="1", "Failed"="0"))) +
theme_minimal()

dat %>%
ggplot() + labs(title="FB Friends Distribution") + guides(color=guide_legend(title="Status")) +
geom_density(aes(friends, color=fct_recode(factor(status), "Successful"="1", "Failed"="0"))) +
theme_minimal()

dat %>%
ggplot() + labs(title="Comments Distribution") + guides(color=guide_legend(title="Status")) +
geom_density(aes(comments, color=fct_recode(factor(status), "Successful"="1", "Failed"="0"))) +
theme_minimal()

dat %>%
ggplot() + labs(title="FB Shares Distribution") + guides(color=guide_legend(title="Status")) +
geom_density(aes(shares, color=fct_recode(factor(status), "Successful"="1", "Failed"="0"))) +
theme_minimal()

dat %>%
ggplot() + labs(title="Cancer Type Counts") + guides(fill=guide_legend(title="Status")) +
geom_bar(aes(x=cancer_type, fill=fct_recode(factor(status), "Successful"="1", "Failed"="0")), position="dodge") +
theme(axis.text.x=element_text(angle = 60, hjust=1))

Removed all random variabes except year because they didn’t help explain any variance in the data beyond what the residuals could capture. Year is a reasonable random effect as well (see Variance Components, Searle et al 2006)
Nesting and Chi2 differences: https://www.psychologie.uzh.ch/dam/jcr:ffffffff-b371-2797-0000-00000fda8f29/chisquare_diff_en.pdf
Model status
formula = status ~ shares_sc + friends_sc + comments_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + duration_float_sc + cancer_type + month + day_of_week + (1|year)
mod = glmer(formula, data = dat, family = "binomial")
formula = update(formula, ~ . - duration_float_sc)
new.mod = glmer(formula, data = dat, family = "binomial")
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## new.mod: status ~ shares_sc + friends_sc + comments_sc + updates_sc +
## new.mod: photos_sc + goal_sc + text_length_words_sc + cancer_type +
## new.mod: month + day_of_week + (1 | year)
## mod: status ~ shares_sc + friends_sc + comments_sc + updates_sc +
## mod: photos_sc + goal_sc + text_length_words_sc + duration_float_sc +
## mod: cancer_type + month + day_of_week + (1 | year)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 45 2437.7 2710.0 -1173.9 2347.7
## mod 46 2437.2 2715.5 -1172.6 2345.2 2.5177 1 0.1126
mod = new.mod
formula = update(formula, ~ . - cancer_type)
new.mod = glmer(formula, data = dat, family = "binomial")
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## new.mod: status ~ shares_sc + friends_sc + comments_sc + updates_sc +
## new.mod: photos_sc + goal_sc + text_length_words_sc + month + day_of_week +
## new.mod: (1 | year)
## mod: status ~ shares_sc + friends_sc + comments_sc + updates_sc +
## mod: photos_sc + goal_sc + text_length_words_sc + cancer_type +
## mod: month + day_of_week + (1 | year)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 26 2429.0 2586.3 -1188.5 2377.0
## mod 45 2437.7 2710.0 -1173.9 2347.7 29.25 19 0.06215 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod = new.mod
formula = update(formula, ~ . - day_of_week)
new.mod = glmer(formula, data = dat, family = "binomial")
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## new.mod: status ~ shares_sc + friends_sc + comments_sc + updates_sc +
## new.mod: photos_sc + goal_sc + text_length_words_sc + month + (1 |
## new.mod: year)
## mod: status ~ shares_sc + friends_sc + comments_sc + updates_sc +
## mod: photos_sc + goal_sc + text_length_words_sc + month + day_of_week +
## mod: (1 | year)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 20 2419.7 2540.7 -1189.8 2379.7
## mod 26 2429.0 2586.3 -1188.5 2377.0 2.6772 6 0.8481
mod = new.mod
formula = update(formula, ~ . - friends_sc)
new.mod = glmer(formula, data = dat, family = "binomial")
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## new.mod: status ~ shares_sc + comments_sc + updates_sc + photos_sc + goal_sc +
## new.mod: text_length_words_sc + month + (1 | year)
## mod: status ~ shares_sc + friends_sc + comments_sc + updates_sc +
## mod: photos_sc + goal_sc + text_length_words_sc + month + (1 |
## mod: year)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 19 2418.7 2533.7 -1190.4 2380.7
## mod 20 2419.7 2540.7 -1189.8 2379.7 1.0706 1 0.3008
mod = new.mod
formula = update(formula, ~ . - month)
new.mod = glmer(formula, data = dat, family = "binomial")
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## new.mod: status ~ shares_sc + comments_sc + updates_sc + photos_sc + goal_sc +
## new.mod: text_length_words_sc + (1 | year)
## mod: status ~ shares_sc + comments_sc + updates_sc + photos_sc + goal_sc +
## mod: text_length_words_sc + month + (1 | year)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 8 2412.2 2460.6 -1198.1 2396.2
## mod 19 2418.7 2533.7 -1190.4 2380.7 15.407 11 0.1646
mod = new.mod
formula = update(formula, ~ . - photos_sc)
new.mod = glmer(formula, data = dat, family = "binomial")
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## new.mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## new.mod: (1 | year)
## mod: status ~ shares_sc + comments_sc + updates_sc + photos_sc + goal_sc +
## mod: text_length_words_sc + (1 | year)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 7 2410.8 2453.2 -1198.4 2396.8
## mod 8 2412.2 2460.6 -1198.1 2396.2 0.7079 1 0.4001
mod = new.mod
The final base model before adding metaphor variables:
summary(mod)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## (1 | year)
## Data: dat
##
## AIC BIC logLik deviance df.resid
## 2410.9 2453.2 -1198.4 2396.9 3127
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.1811 -0.4439 -0.3801 -0.2119 12.1713
##
## Random effects:
## Groups Name Variance Std.Dev.
## year (Intercept) 0.0377 0.1942
## Number of obs: 3134, groups: year, 7
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.00678 0.11162 -17.979 < 2e-16 ***
## shares_sc 0.59186 0.09574 6.182 6.33e-10 ***
## comments_sc 0.18564 0.05833 3.183 0.001458 **
## updates_sc -0.31525 0.08807 -3.579 0.000344 ***
## goal_sc -1.49982 0.17424 -8.608 < 2e-16 ***
## text_length_words_sc 0.11187 0.05245 2.133 0.032940 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) shrs_s cmmnt_ updts_ gol_sc
## shares_sc -0.161
## comments_sc -0.148 -0.312
## updates_sc 0.052 -0.215 -0.218
## goal_sc 0.411 -0.394 -0.074 0.024
## txt_lngth__ -0.073 -0.015 0.027 -0.146 -0.148
The probability of project success increases with increases in shares, comments, and text length. It decreases with increases in updates and goal amounts.
Add metaphors
Use nAQG=0 because nAQG=1 cannot converge in a reasonable number (10,000) of iterations
See https://stats.stackexchange.com/questions/77313/why-cant-i-match-glmer-family-binomial-output-with-manual-implementation-of-g and https://www.rdocumentation.org/packages/lme4/versions/1.1-19/topics/glmer
formula.temp = update(formula, ~ . + no_metaphor)
new.mod = glmer(formula.temp, data = dat, family = "binomial")
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## mod: (1 | year)
## new.mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## new.mod: (1 | year) + no_metaphor
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 7 2410.8 2453.2 -1198.4 2396.8
## new.mod 8 2412.1 2460.5 -1198.0 2396.1 0.7659 1 0.3815
formula.temp = update(formula, ~ . + any_metaphor)
new.mod = glmer(formula.temp, data = dat, family = "binomial")
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## mod: (1 | year)
## new.mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## new.mod: (1 | year) + any_metaphor
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 7 2410.8 2453.2 -1198.4 2396.8
## new.mod 8 2412.1 2460.5 -1198.0 2396.1 0.7659 1 0.3815
formula.temp = update(formula, ~ . + dom_journey)
new.mod = glmer(formula.temp, data = dat, family = "binomial")
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## mod: (1 | year)
## new.mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## new.mod: (1 | year) + dom_journey
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 7 2410.8 2453.2 -1198.4 2396.8
## new.mod 8 2410.2 2458.6 -1197.1 2394.2 2.6107 1 0.1061
formula.temp = update(formula, ~ . + dom_journey + journey_prod)
new.mod = glmer(formula.temp, data = dat, family = "binomial")
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## mod: (1 | year)
## new.mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## new.mod: (1 | year) + dom_journey + journey_prod
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 7 2410.8 2453.2 -1198.4 2396.8
## new.mod 9 2410.1 2464.5 -1196.0 2392.1 4.7736 2 0.09192 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
formula.temp = update(formula, ~ . + dom_battle)
new.mod = glmer(formula.temp, data = dat, family = "binomial")
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## mod: (1 | year)
## new.mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## new.mod: (1 | year) + dom_battle
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 7 2410.8 2453.2 -1198.4 2396.8
## new.mod 8 2409.8 2458.2 -1196.9 2393.8 3.0282 1 0.08183 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
formula.temp = update(formula, ~ . + dom_battle + battle_prod)
new.mod = glmer(formula.temp, data = dat, family = "binomial")
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## mod: (1 | year)
## new.mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## new.mod: (1 | year) + dom_battle + battle_prod
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 7 2410.8 2453.2 -1198.4 2396.8
## new.mod 9 2411.6 2466.0 -1196.8 2393.6 3.297 2 0.1923
formula.temp = update(formula, ~ . + only_battle)
new.mod = glmer(formula.temp, data = dat, family = "binomial")
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## mod: (1 | year)
## new.mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## new.mod: (1 | year) + only_battle
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 7 2410.8 2453.2 -1198.4 2396.8
## new.mod 8 2411.9 2460.3 -1198.0 2395.9 0.9508 1 0.3295
formula.temp = update(formula, ~ . + only_journey)
new.mod = glmer(formula.temp, data = dat, family = "binomial")
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## mod: (1 | year)
## new.mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## new.mod: (1 | year) + only_journey
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 7 2410.8 2453.2 -1198.4 2396.8
## new.mod 8 2407.5 2455.9 -1195.8 2391.5 5.3344 1 0.02091 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
formula.temp = update(formula, ~ . + scale(battle_salience))
new.mod = glmer(formula.temp, data = dat, family = "binomial")
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## mod: (1 | year)
## new.mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## new.mod: (1 | year) + scale(battle_salience)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 7 2410.8 2453.2 -1198.4 2396.8
## new.mod 8 2411.4 2459.8 -1197.7 2395.4 1.4063 1 0.2357
formula.temp = update(formula, ~ . + scale(journey_salience))
new.mod = glmer(formula.temp, data = dat, family = "binomial")
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## mod: (1 | year)
## new.mod: status ~ shares_sc + comments_sc + updates_sc + goal_sc + text_length_words_sc +
## new.mod: (1 | year) + scale(journey_salience)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 7 2410.8 2453.2 -1198.4 2396.8
## new.mod 8 2412.5 2460.9 -1198.2 2396.5 0.3963 1 0.529
The probability of project success decreases when journey is the only metaphor family present (beta = -0.65701).
Number of Backers
library(glmmTMB)
nrow(dat[dat$backers > 1200, ])
## [1] 17
ggplot() + labs(x="Number of Backers", title="Number of Backers Density") +
geom_density(aes(dat$backers[dat$backers < 1200]))

We limit to 1200 because removing the outliers leaves us with a nicely shaped distribution.
dat.b <- dat[dat$backers < 1200, ]
Runa quick data dispersion test (see Rice 1995):
s = na.omit(dat.b)
pchisq(2 * sum(dat.b$backers * log(dat.b$backers / mean(dat.b$backers))), length(dat.b$backers) - 1, lower.tail = F)
## [1] NaN
H0: The data are fit well by a Poisson Distribution H1: Poisson fails to fit the data well
The Poisson distribution obviously does not fit the data well since p approx 0. Let’s use a NegBin instead, which can account for differences in the mean and variance.
dat.b %>%
ggplot(aes(goal, backers)) + labs(title="Goal Amount Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()

dat.b %>%
ggplot(aes(duration_float, backers)) + labs(title="Duration Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()

dat.b %>%
ggplot(aes(text_length_words, backers)) + labs(title="Text Length Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()

dat.b %>%
ggplot(aes(photos, backers)) + labs(title="Photos Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()

dat.b %>%
ggplot(aes(updates, backers)) + labs(title="Updates Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()

dat.b %>%
ggplot(aes(friends, backers)) + labs(title="FB Friends Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()

dat.b %>%
ggplot(aes(comments, backers)) + labs(title="Comments Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()

dat.b %>%
ggplot(aes(shares, backers)) + labs(title="FB Shares Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()

dat.b %>%
ggplot(aes(cancer_type, backers)) + labs(title="Cancer Types") +
geom_boxplot() +
theme_minimal() +
theme(axis.text.x=element_text(angle = 60, hjust=1))

Model backers
formula = backers ~ shares_sc + friends_sc + comments_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + duration_float_sc + cancer_type + month + day_of_week + (1|year)
mod = glmmTMB(formula, data = dat.b, family = "nbinom2")
formula = update(formula, ~ . - friends_sc)
new.mod = glmmTMB(formula, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## new.mod: backers ~ shares_sc + comments_sc + updates_sc + photos_sc + , zi=~0, disp=~1
## new.mod: goal_sc + text_length_words_sc + duration_float_sc + cancer_type + , zi=~0, disp=~1
## new.mod: month + day_of_week + (1 | year), zi=~0, disp=~1
## mod: backers ~ shares_sc + friends_sc + comments_sc + updates_sc + , zi=~0, disp=~1
## mod: photos_sc + goal_sc + text_length_words_sc + duration_float_sc + , zi=~0, disp=~1
## mod: cancer_type + month + day_of_week + (1 | year), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 46 33355 33633 -16631 33263
## mod 47 33354 33638 -16630 33260 2.235 1 0.1349
mod = new.mod
formula = update(formula, ~ . - comments_sc)
new.mod = glmmTMB(formula, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + day_of_week + (1 | , zi=~0, disp=~1
## new.mod: year), zi=~0, disp=~1
## mod: backers ~ shares_sc + comments_sc + updates_sc + photos_sc + , zi=~0, disp=~1
## mod: goal_sc + text_length_words_sc + duration_float_sc + cancer_type + , zi=~0, disp=~1
## mod: month + day_of_week + (1 | year), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 45 33355 33627 -16632 33265
## mod 46 33355 33633 -16631 33263 1.8831 1 0.17
mod = new.mod
formula = update(formula, ~ . - day_of_week)
new.mod = glmmTMB(formula, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + day_of_week + (1 | , zi=~0, disp=~1
## mod: year), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 39 33344 33579 -16633 33266
## mod 45 33355 33627 -16632 33265 1.1865 6 0.9775
mod = new.mod
summary(mod)
## Family: nbinom2 ( log )
## Formula:
## backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc +
## duration_float_sc + cancer_type + month + (1 | year)
## Data: dat.b
##
## AIC BIC logLik deviance df.resid
## 33343.7 33579.4 -16632.8 33265.7 3078
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## year (Intercept) 0.006106 0.07814
## Number of obs: 3117, groups: year, 7
##
## Overdispersion parameter for nbinom2 family (): 1.42
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.26630 0.08169 52.23 < 2e-16 ***
## shares_sc 0.93718 0.04073 23.01 < 2e-16 ***
## updates_sc -0.07158 0.02086 -3.43 0.000601 ***
## photos_sc 0.11464 0.02160 5.31 1.11e-07 ***
## goal_sc 0.35423 0.02668 13.28 < 2e-16 ***
## text_length_words_sc 0.12422 0.01785 6.96 3.44e-12 ***
## duration_float_sc 0.09042 0.03070 2.95 0.003226 **
## cancer_typebone cancer 0.28649 0.08598 3.33 0.000862 ***
## cancer_typebrain cancer 0.29941 0.09435 3.17 0.001508 **
## cancer_typebreast cancer 0.48875 0.07528 6.49 8.47e-11 ***
## cancer_typecervical cancer -0.05462 0.16988 -0.32 0.747831
## cancer_typecolon cancer 0.26930 0.15204 1.77 0.076514 .
## cancer_typeesophageal cancer 0.38539 0.10136 3.80 0.000143 ***
## cancer_typegeneral 0.36524 0.07224 5.06 4.28e-07 ***
## cancer_typekidney cancer 0.17013 0.08615 1.97 0.048272 *
## cancer_typeleukemia 0.57304 0.09553 6.00 1.99e-09 ***
## cancer_typeliver cancer -0.33366 0.10855 -3.07 0.002114 **
## cancer_typelung cancer -0.20587 0.08184 -2.52 0.011883 *
## cancer_typelymphoma 0.39472 0.07942 4.97 6.69e-07 ***
## cancer_typemelanoma 0.05840 0.10693 0.55 0.584957
## cancer_typemixed 0.16258 0.07729 2.10 0.035416 *
## cancer_typeneuroblastoma 0.27239 0.12386 2.20 0.027866 *
## cancer_typepancreatic cancer 0.48547 0.19220 2.53 0.011540 *
## cancer_typeprostate cancer 0.19192 0.20416 0.94 0.347213
## cancer_typeskin cancer -0.06833 0.09641 -0.71 0.478501
## cancer_typetesticular cancer 0.49735 0.13613 3.65 0.000259 ***
## month2 -0.05466 0.06589 -0.83 0.406772
## month3 -0.04692 0.07919 -0.59 0.553476
## month4 -0.10039 0.08081 -1.24 0.214105
## month5 -0.18418 0.07531 -2.45 0.014455 *
## month6 -0.17296 0.07949 -2.18 0.029558 *
## month7 -0.13106 0.07446 -1.76 0.078409 .
## month8 0.02658 0.07702 0.35 0.730068
## month9 -0.02184 0.07570 -0.29 0.772985
## month10 -0.20660 0.07358 -2.81 0.004989 **
## month11 -0.09431 0.07414 -1.27 0.203366
## month12 0.04379 0.07494 0.58 0.559034
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Add metaphors
formula.temp = update(formula, ~ . + no_metaphor)
new.mod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + no_metaphor, zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 39 33344 33579 -16633 33266
## new.mod 40 33316 33558 -16618 33236 29.497 1 5.6e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
formula.temp = update(formula, ~ . + any_metaphor)
new.mod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + any_metaphor, zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 39 33344 33579 -16633 33266
## new.mod 40 33316 33558 -16618 33236 29.497 1 5.6e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
formula.temp = update(formula, ~ . + dom_journey)
new.mod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + dom_journey, zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 39 33344 33579 -16633 33266
## new.mod 40 33344 33585 -16632 33264 2.0608 1 0.1511
formula.temp = update(formula, ~ . + dom_journey + journey_prod)
new.mod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + dom_journey + , zi=~0, disp=~1
## new.mod: journey_prod, zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 39 33344 33579 -16633 33266
## new.mod 41 33345 33593 -16632 33263 2.7478 2 0.2531
formula.temp = update(formula, ~ . + dom_battle)
new.mod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + dom_battle, zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 39 33344 33579 -16633 33266
## new.mod 40 33328 33570 -16624 33248 17.702 1 2.583e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
formula.temp = update(formula, ~ . + dom_battle + battle_prod)
new.mod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + dom_battle + , zi=~0, disp=~1
## new.mod: battle_prod, zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 39 33344 33579 -16633 33266
## new.mod 41 33328 33576 -16623 33246 19.984 2 4.576e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
formula.temp = update(formula, ~ . + only_battle)
new.mod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + only_battle, zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 39 33344 33579 -16633 33266
## new.mod 40 33329 33570 -16624 33249 17.105 1 3.536e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
formula.temp = update(formula, ~ . + only_journey)
new.mod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + only_journey, zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 39 33344 33579 -16633 33266
## new.mod 40 33345 33587 -16633 33265 0.2586 1 0.6111
formula.temp = update(formula, ~ . + scale(battle_salience))
new.mod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + scale(battle_salience), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 39 33344 33579 -16633 33266
## new.mod 40 33325 33567 -16622 33245 20.878 1 4.896e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
formula.temp = update(formula, ~ . + scale(journey_salience))
new.mod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + scale(journey_salience), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 39 33344 33579 -16633 33266
## new.mod 40 33339 33581 -16630 33259 6.2959 1 0.0121 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
On average, not having metaphors present lowers the expected value of log(backers) by 17.3%. When battle metaphors are dominant, the expected value of log(backers) increases by 13.1%. When battle metaphors are dominant, then an additional unit of battle productivity increases the expected value of log(backers) by 1.1%. When there are only battle metaphors present, we see an increase of 12.9%. Unit increases in battle salience lead to 7.2% increases in log(backers) expected value, and similarly, a unit increase in journey salience leads to a 3.8% increase.
Mean Donation
dat %>%
ggplot() + labs(title="Mean Donation Density") +
geom_density(aes(mean_donation+1))

dat = dat[dat$mean_donation < 500, ]
dat %>%
ggplot() + geom_qq(aes(sample=mean_donation+1), distribution = qexp) + geom_qq_line(aes(sample=mean_donation+1), distribution = qexp)

dat %>%
ggplot(aes(goal, mean_donation)) + labs(title="Goal Amount Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()

dat %>%
ggplot(aes(duration_float, mean_donation)) + labs(title="Duration Distribution") +
geom_boxplot() +
theme_minimal()
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?

dat %>%
ggplot(aes(text_length_words, mean_donation)) + labs(title="Text Length Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()

dat %>%
ggplot(aes(photos, mean_donation)) + labs(title="Photos Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()

dat %>%
ggplot(aes(updates, mean_donation)) + labs(title="Updates Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()

dat %>%
ggplot(aes(friends, mean_donation)) + labs(title="FB Friends Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()

dat %>%
ggplot(aes(comments, mean_donation)) + labs(title="Comments Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()

dat %>%
ggplot(aes(shares, mean_donation)) + labs(title="FB Shares Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()

dat %>%
ggplot(aes(cancer_type, mean_donation)) + labs(title="Cancer Types") +
geom_boxplot() +
theme_minimal() +
theme(axis.text.x=element_text(angle = 60, hjust=1))

The data fit an exponential fairly well, so we model with Gamma and a log link (see above answer for reason).
Model mean donation
formula = mean_donation+1 ~ shares_sc + friends_sc + comments_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + duration_float_sc + cancer_type + month + day_of_week + (1|year)
mod = glmer(formula, data = dat, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0350786
## (tol = 0.001, component 1)
formula = update(formula, ~ . - friends_sc)
new.mod = glmer(formula, data = dat, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0257135
## (tol = 0.001, component 1)
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## new.mod: mean_donation + 1 ~ shares_sc + comments_sc + updates_sc + photos_sc +
## new.mod: goal_sc + text_length_words_sc + duration_float_sc + cancer_type +
## new.mod: month + day_of_week + (1 | year)
## mod: mean_donation + 1 ~ shares_sc + friends_sc + comments_sc + updates_sc +
## mod: photos_sc + goal_sc + text_length_words_sc + duration_float_sc +
## mod: cancer_type + month + day_of_week + (1 | year)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 46 33229 33508 -16569 33137
## mod 47 33230 33514 -16568 33136 1.6779 1 0.1952
mod = new.mod
formula = update(formula, ~ . - month)
new.mod = glmer(formula, data = dat, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00790202
## (tol = 0.001, component 1)
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## new.mod: mean_donation + 1 ~ shares_sc + comments_sc + updates_sc + photos_sc +
## new.mod: goal_sc + text_length_words_sc + duration_float_sc + cancer_type +
## new.mod: day_of_week + (1 | year)
## mod: mean_donation + 1 ~ shares_sc + comments_sc + updates_sc + photos_sc +
## mod: goal_sc + text_length_words_sc + duration_float_sc + cancer_type +
## mod: month + day_of_week + (1 | year)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 35 33213 33425 -16572 33143
## mod 46 33229 33508 -16569 33137 5.7588 11 0.889
mod = new.mod
formula = update(formula, ~ . - day_of_week)
new.mod = glmer(formula, data = dat, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00167218
## (tol = 0.001, component 1)
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## new.mod: mean_donation + 1 ~ shares_sc + comments_sc + updates_sc + photos_sc +
## new.mod: goal_sc + text_length_words_sc + duration_float_sc + cancer_type +
## new.mod: (1 | year)
## mod: mean_donation + 1 ~ shares_sc + comments_sc + updates_sc + photos_sc +
## mod: goal_sc + text_length_words_sc + duration_float_sc + cancer_type +
## mod: day_of_week + (1 | year)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 29 33204 33379 -16573 33146
## mod 35 33213 33425 -16572 33143 2.5085 6 0.8675
mod = new.mod
formula = update(formula, ~ . - updates_sc)
new.mod = glmer(formula, data = dat, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0076128
## (tol = 0.001, component 1)
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## new.mod: mean_donation + 1 ~ shares_sc + comments_sc + photos_sc + goal_sc +
## new.mod: text_length_words_sc + duration_float_sc + cancer_type +
## new.mod: (1 | year)
## mod: mean_donation + 1 ~ shares_sc + comments_sc + updates_sc + photos_sc +
## mod: goal_sc + text_length_words_sc + duration_float_sc + cancer_type +
## mod: (1 | year)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 28 33205 33374 -16574 33149
## mod 29 33204 33379 -16573 33146 3.0013 1 0.0832 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod = new.mod
formula = update(formula, ~ . - photos_sc)
new.mod = glmer(formula, data = dat, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00223936
## (tol = 0.001, component 1)
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## new.mod: mean_donation + 1 ~ shares_sc + comments_sc + goal_sc + text_length_words_sc +
## new.mod: duration_float_sc + cancer_type + (1 | year)
## mod: mean_donation + 1 ~ shares_sc + comments_sc + photos_sc + goal_sc +
## mod: text_length_words_sc + duration_float_sc + cancer_type +
## mod: (1 | year)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 27 33203 33366 -16574 33149
## mod 28 33205 33374 -16574 33149 0.0181 1 0.8929
mod = new.mod
formula = update(formula, ~ . - duration_float_sc)
new.mod = glmer(formula, data = dat, family = Gamma(link = "log"))
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## new.mod: mean_donation + 1 ~ shares_sc + comments_sc + goal_sc + text_length_words_sc +
## new.mod: cancer_type + (1 | year)
## mod: mean_donation + 1 ~ shares_sc + comments_sc + goal_sc + text_length_words_sc +
## mod: duration_float_sc + cancer_type + (1 | year)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 26 33202 33359 -16575 33150
## mod 27 33203 33366 -16574 33149 1.0396 1 0.3079
mod = new.mod
formula = update(formula, ~ . - comments_sc)
new.mod = glmer(formula, data = dat, family = Gamma(link = "log"))
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## new.mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## new.mod: cancer_type + (1 | year)
## mod: mean_donation + 1 ~ shares_sc + comments_sc + goal_sc + text_length_words_sc +
## mod: cancer_type + (1 | year)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 25 33200 33351 -16575 33150
## mod 26 33202 33359 -16575 33150 0.4389 1 0.5076
mod = new.mod
summary(mod)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Gamma ( log )
## Formula: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## cancer_type + (1 | year)
## Data: dat
##
## AIC BIC logLik deviance df.resid
## 33200.3 33351.5 -16575.1 33150.3 3103
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.8828 -0.5773 -0.1246 0.3953 7.7579
##
## Random effects:
## Groups Name Variance Std.Dev.
## year (Intercept) 0.002096 0.04578
## Residual 0.276345 0.52569
## Number of obs: 3128, groups: year, 7
##
## Fixed effects:
## Estimate Std. Error t value Pr(>|z|)
## (Intercept) 4.527300 0.055376 81.755 < 2e-16 ***
## shares_sc -0.078585 0.017280 -4.548 5.42e-06 ***
## goal_sc 0.158141 0.016110 9.816 < 2e-16 ***
## text_length_words_sc 0.045669 0.012690 3.599 0.00032 ***
## cancer_typebone cancer -0.203620 0.063825 -3.190 0.00142 **
## cancer_typebrain cancer -0.062528 0.070690 -0.885 0.37640
## cancer_typebreast cancer -0.003156 0.056105 -0.056 0.95515
## cancer_typecervical cancer -0.100953 0.127291 -0.793 0.42773
## cancer_typecolon cancer -0.124169 0.114422 -1.085 0.27784
## cancer_typeesophageal cancer 0.078053 0.075983 1.027 0.30431
## cancer_typegeneral -0.083379 0.053478 -1.559 0.11896
## cancer_typekidney cancer -0.045654 0.064358 -0.709 0.47810
## cancer_typeleukemia -0.026484 0.070844 -0.374 0.70853
## cancer_typeliver cancer -0.264569 0.080771 -3.276 0.00105 **
## cancer_typelung cancer 0.035749 0.061238 0.584 0.55937
## cancer_typelymphoma -0.017032 0.059498 -0.286 0.77469
## cancer_typemelanoma 0.003561 0.080265 0.044 0.96461
## cancer_typemixed -0.011185 0.058019 -0.193 0.84713
## cancer_typeneuroblastoma -0.045007 0.093290 -0.482 0.62950
## cancer_typepancreatic cancer 0.050632 0.145063 0.349 0.72707
## cancer_typeprostate cancer 0.497038 0.153993 3.228 0.00125 **
## cancer_typeskin cancer -0.193095 0.072112 -2.678 0.00741 **
## cancer_typetesticular cancer -0.249230 0.101225 -2.462 0.01381 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 23 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
Add metaphors
formula.temp = update(formula, ~ . + no_metaphor)
new.mod = glmer(formula.temp, data = dat, family = Gamma(link = "log"))
summary(new.mod)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Gamma ( log )
## Formula: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## cancer_type + (1 | year) + no_metaphor
## Data: dat
##
## AIC BIC logLik deviance df.resid
## 33182.0 33339.3 -16565.0 33130.0 3102
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.8810 -0.5779 -0.1373 0.3965 8.3002
##
## Random effects:
## Groups Name Variance Std.Dev.
## year (Intercept) 0.002106 0.04589
## Residual 0.277191 0.52649
## Number of obs: 3128, groups: year, 7
##
## Fixed effects:
## Estimate Std. Error t value Pr(>|z|)
## (Intercept) 4.578400 0.056465 81.083 < 2e-16 ***
## shares_sc -0.082532 0.017201 -4.798 1.60e-06 ***
## goal_sc 0.152340 0.016020 9.510 < 2e-16 ***
## text_length_words_sc 0.033991 0.012827 2.650 0.008047 **
## cancer_typebone cancer -0.202444 0.063656 -3.180 0.001471 **
## cancer_typebrain cancer -0.060290 0.070490 -0.855 0.392383
## cancer_typebreast cancer -0.011543 0.055988 -0.206 0.836653
## cancer_typecervical cancer -0.106113 0.126947 -0.836 0.403219
## cancer_typecolon cancer -0.123474 0.114129 -1.082 0.279305
## cancer_typeesophageal cancer 0.065837 0.075822 0.868 0.385224
## cancer_typegeneral -0.083417 0.053340 -1.564 0.117849
## cancer_typekidney cancer -0.041169 0.064192 -0.641 0.521302
## cancer_typeleukemia -0.027666 0.070655 -0.392 0.695383
## cancer_typeliver cancer -0.267404 0.080554 -3.320 0.000902 ***
## cancer_typelung cancer 0.032483 0.061077 0.532 0.594840
## cancer_typelymphoma -0.026415 0.059372 -0.445 0.656387
## cancer_typemelanoma -0.006331 0.080067 -0.079 0.936977
## cancer_typemixed -0.011031 0.057870 -0.191 0.848826
## cancer_typeneuroblastoma -0.053006 0.093044 -0.570 0.568885
## cancer_typepancreatic cancer 0.039690 0.144678 0.274 0.783829
## cancer_typeprostate cancer 0.493283 0.153587 3.212 0.001319 **
## cancer_typeskin cancer -0.180466 0.071965 -2.508 0.012153 *
## cancer_typetesticular cancer -0.234916 0.100993 -2.326 0.020016 *
## no_metaphorTRUE -0.107753 0.023889 -4.511 6.46e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 24 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## mod: cancer_type + (1 | year)
## new.mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## new.mod: cancer_type + (1 | year) + no_metaphor
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 25 33200 33351 -16575 33150
## new.mod 26 33182 33339 -16565 33130 20.241 1 6.828e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
formula.temp = update(formula, ~ . + any_metaphor)
new.mod = glmer(formula.temp, data = dat, family = Gamma(link = "log"))
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## mod: cancer_type + (1 | year)
## new.mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## new.mod: cancer_type + (1 | year) + any_metaphor
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 25 33200 33351 -16575 33150
## new.mod 26 33182 33339 -16565 33130 20.241 1 6.828e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
formula.temp = update(formula, ~ . + dom_journey)
new.mod = glmer(formula.temp, data = dat, family = Gamma(link = "log"))
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## mod: cancer_type + (1 | year)
## new.mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## new.mod: cancer_type + (1 | year) + dom_journey
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 25 33200 33351 -16575 33150
## new.mod 26 33202 33359 -16575 33150 0.739 1 0.39
formula.temp = update(formula, ~ . + dom_journey + journey_prod)
new.mod = glmer(formula.temp, data = dat, family = Gamma(link = "log"))
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## mod: cancer_type + (1 | year)
## new.mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## new.mod: cancer_type + (1 | year) + dom_journey + journey_prod
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 25 33200 33351 -16575 33150
## new.mod 27 33203 33366 -16574 33149 1.3002 2 0.522
formula.temp = update(formula, ~ . + dom_battle)
new.mod = glmer(formula.temp, data = dat, family = Gamma(link = "log"))
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## mod: cancer_type + (1 | year)
## new.mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## new.mod: cancer_type + (1 | year) + dom_battle
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 25 33200 33351 -16575 33150
## new.mod 26 33185 33342 -16567 33133 17.075 1 3.594e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
formula.temp = update(formula, ~ . + dom_battle + battle_prod)
new.mod = glmer(formula.temp, data = dat, family = Gamma(link = "log"))
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## mod: cancer_type + (1 | year)
## new.mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## new.mod: cancer_type + (1 | year) + dom_battle + battle_prod
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 25 33200 33351 -16575 33150
## new.mod 27 33187 33350 -16566 33133 17.55 2 0.0001546 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
formula.temp = update(formula, ~ . + only_battle)
new.mod = glmer(formula.temp, data = dat, family = Gamma(link = "log"))
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## mod: cancer_type + (1 | year)
## new.mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## new.mod: cancer_type + (1 | year) + only_battle
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 25 33200 33351 -16575 33150
## new.mod 26 33187 33344 -16568 33135 15.125 1 0.0001006 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
formula.temp = update(formula, ~ . + only_journey)
new.mod = glmer(formula.temp, data = dat, family = Gamma(link = "log"))
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## mod: cancer_type + (1 | year)
## new.mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## new.mod: cancer_type + (1 | year) + only_journey
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 25 33200 33351 -16575 33150
## new.mod 26 33201 33359 -16575 33149 0.7928 1 0.3733
formula.temp = update(formula, ~ . + scale(battle_salience))
new.mod = glmer(formula.temp, data = dat, family = Gamma(link = "log"))
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## mod: cancer_type + (1 | year)
## new.mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## new.mod: cancer_type + (1 | year) + scale(battle_salience)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 25 33200 33351 -16575 33150
## new.mod 26 33195 33353 -16572 33143 6.9094 1 0.008575 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
formula.temp = update(formula, ~ . + scale(journey_salience))
new.mod = glmer(formula.temp, data = dat, family = Gamma(link = "log"))
anova(new.mod, mod, test="Chisq")
## Data: dat
## Models:
## mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## mod: cancer_type + (1 | year)
## new.mod: mean_donation + 1 ~ shares_sc + goal_sc + text_length_words_sc +
## new.mod: cancer_type + (1 | year) + scale(journey_salience)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 25 33200 33351 -16575 33150
## new.mod 26 33199 33356 -16573 33147 3.5842 1 0.05833 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1